{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "import cv2\n",
    "import imutils\n",
    "import numpy as np\n",
    "import tarfile\n",
    "import shutil\n",
    "import os\n",
    "from tqdm import tqdm\n",
    "def mkdir(path):\n",
    "    if not os.path.exists(path):\n",
    "        os.makedirs(path)\n",
    "def rmdir(path):\n",
    "    if os.path.exists(path):\n",
    "        shutil.rmtree(path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "# 分类数据集\n",
    "数据集有数字和大小写字母，但是我们只需要0123456789和非数字。\n",
    "因此将 A~Z,a~z 的图片移到 A 的文件夹，再将其他空文件夹删除。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 51/51 [00:04<00:00, 10.92it/s]\n"
     ]
    }
   ],
   "source": [
    "notnumdir = '/Users/wzq/Downloads/English/Fnt/Sample011/'\n",
    "for i in tqdm(range(12, 63)):\n",
    "    path = '/Users/wzq/Downloads/English/Fnt/Sample%03d/' % i\n",
    "    for filename in os.listdir(path):\n",
    "        os.rename(path+filename, notnumdir+filename)\n",
    "    os.rmdir(path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "# 预处理所有图片\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/wzq/Downloads/LED/train/0/: 100%|██████████| 296/296 [00:02<00:00, 105.01it/s]\n",
      "/Users/wzq/Downloads/LED/train/1/: 100%|██████████| 266/266 [00:03<00:00, 83.76it/s]\n",
      "/Users/wzq/Downloads/LED/train/2/: 100%|██████████| 333/333 [00:03<00:00, 90.13it/s] \n",
      "/Users/wzq/Downloads/LED/train/3/: 100%|██████████| 288/288 [00:02<00:00, 97.31it/s] \n",
      "/Users/wzq/Downloads/LED/train/4/: 100%|██████████| 309/309 [00:02<00:00, 106.68it/s]\n",
      "/Users/wzq/Downloads/LED/train/5/: 100%|██████████| 334/334 [00:03<00:00, 106.00it/s]\n",
      "/Users/wzq/Downloads/LED/train/6/: 100%|██████████| 273/273 [00:02<00:00, 99.42it/s] \n",
      "/Users/wzq/Downloads/LED/train/7/: 100%|██████████| 313/313 [00:02<00:00, 107.27it/s]\n",
      "/Users/wzq/Downloads/LED/train/8/: 100%|██████████| 270/270 [00:02<00:00, 112.49it/s]\n"
     ]
    }
   ],
   "source": [
    "def resize(rawimg):  # resize img to 28*28\n",
    "    fx = 28.0 / rawimg.shape[0]\n",
    "    fy = 28.0 / rawimg.shape[1]\n",
    "    fx = fy = min(fx, fy)\n",
    "    img = cv2.resize(rawimg, None, fx=fx, fy=fy, interpolation=cv2.INTER_CUBIC)\n",
    "    outimg = np.zeros((28, 28), dtype=np.uint8) * 255\n",
    "    w = img.shape[1]\n",
    "    h = img.shape[0]\n",
    "    x = (28 - w) / 2\n",
    "    y = (28 - h) / 2\n",
    "    outimg[y:y+h, x:x+w] = img\n",
    "    return outimg\n",
    "\n",
    "def convert(img):\n",
    "    # img = cv2.imread(imgpath)\n",
    "    # gray = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE)\n",
    "    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
    "    #bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 25)\n",
    "    #ctrs, hier = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
    "    #rects = [cv2.boundingRect(ctr) for ctr in ctrs]\n",
    "    #x, y, w, h = rects[-1]\n",
    "    #roi = gray[y:y + h, x:x + w]\n",
    "    return resize(gray)\n",
    "\n",
    "\n",
    "def rotate(image, angle):\n",
    "    rotated = imutils.rotate(image, angle)\n",
    "    return convert(rotated)\n",
    "\n",
    "for i in range(0,9):\n",
    "    #i=10\n",
    "    path = '/Users/wzq/Downloads/LED/%d/' % (i+1)\n",
    "    trainpath = '/Users/wzq/Downloads/LED/train/%d/' % i\n",
    "    mkdir(trainpath)\n",
    "    j=0\n",
    "    for filename in tqdm(os.listdir(path), desc=trainpath):\n",
    "        try:\n",
    "            img=cv2.imread(path + filename)\n",
    "            \n",
    "            \n",
    "            for angle in np.arange(-20, 21, 1):\n",
    "                j+=1\n",
    "                #print img.shape\n",
    "                #print(\"img%03d-%05d.png\"%{i,j})\n",
    "                cv2.imwrite(trainpath + \"img%(type)03d-%(num)05d.png\"%{'type':i,'num':j}, rotate(img,angle))\n",
    "            \n",
    "        except:\n",
    "            pass    #cv2.waitKey(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "for i in range(1,11):\n",
    "    trainpath = '/Users/wzq/Downloads/LED/train/%d/' % i\n",
    "    validpath = '/Users/wzq/Downloads/LED/valid/%d/' % i\n",
    "    mkdir(validpath)\n",
    "    imgs = os.listdir(trainpath)\n",
    "    trainimgs, validimgs = train_test_split(imgs, test_size=0.1)\n",
    "    for filename in validimgs:\n",
    "        os.rename(trainpath+filename, validpath+filename)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
